#!/usr/bin/env python


__author__= 'yingnn'

'''methylation fitting using random forest'''


import sys


if len(sys.argv) < 3:
	print sys.argv[0], "fname n_trees n_jobs\n200 trees recommended as a start.\n"
	exit()

fname=sys.argv[1]
n_estimator=int(sys.argv[2])
# n_trees=sys.argv[2]
# jobs=int(sys.argv[3])
# n_col_sampling=int(sys.argv[4])

# these modules should have been installed. Or u maybe try setting environment variable "export PYTHONPATH=$PYTHONPATH:/mnt/ilustre/app/medical/tools/py_module"
import pandas as pd
import numpy as np
# import random
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier as abc


dat= pd.read_csv(fname)

# dat= dat.transpose()

dat1= dat.dropna(axis=1, how='any')

abc1 = abc(n_estimators=n_estimator)
scores = cross_val_score(abc1, dat1.iloc[:, 1:], dat1.iloc[:, 0])
scores.mean()  

f= open('score_ab.txt', 'a')
f.write("\t".join([str(scores.mean()), fname])+ "\n")
f.close()
